library(datasets)
hist(mtcars$disp)
hist(mtcars$disp, breaks=100, col="Green")
## density instead of frequency
hist(mtcars$disp, breaks=100, col="Green", freq=FALSE)
## density plot
d <- density(mtcars$mpg) ## saving the density output in a variable
plot(d) # intelligently plots the results
## filled density plot
d <- density(mtcars$mpg)
plot(d, main="Kernel Density of Miles Per Gallon")
polygon(d, col="red", border="blue")
## population vs income
plot(state.x77[,1], state.x77[,2])
## adding plot title
plot(state.x77[,1], state.x77[,2], main = "Population vs Income")
#### adding x and y labels - xlab and ylab
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income")
#### adding color - color number
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=2)
#### adding color - with name
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col="blue")
#### pch
## changing type of point using pch
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, pch=20)
#### cex
## controlling size of symbols using cex
### cex = 0.8
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, pch=20, cex = 0.8)
### cex = 1.8
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, pch=20, cex = 1.8)
## line plots
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="l")
## points and lines
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b")
## line type
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b", lty=2)
## different line type
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b", lty=4)
## line width
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b", lty=4, lwd=2)
## abline
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)
abline(h=4000,col="red")
abline(v=7000,col="blue", lty=3, lwd=4)
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)
model <- lm(state.x77[,2] ~ state.x77[,1])
abline(model, lwd=2, lty=3)
## Example of labeling points
plot(mtcars$wt, mtcars$mpg, main="Mileage vs. Car Weight",
xlab="Weight", ylab="Mileage", pch=18, col="blue")
text(mtcars$wt, mtcars$mpg, row.names(mtcars), cex=0.6, pos=4, col="red")
type= valuesx <- c(1:5); y <- x # create some data
par(pch=22, col="red") # plotting symbol and color
par(mfrow=c(2,4)) # all plots on one page
opts = c("p","l","o","b","c","s","S","h")
for(i in 1:length(opts)){
heading = paste("type=",opts[i])
plot(x, y, type="n", main=heading)
lines(x, y, type=opts[i])
}
library(datasets)
boxplot(state.x77)
boxplot(scale(state.x77))
## population
boxplot(state.x77[,1], ylab="Population")
title("Boxplot of State Populations")
# Boxplot of MPG by Car Cylinders
boxplot(mpg~cyl,data=mtcars, main="Car Milage Data",
xlab="Number of Cylinders", ylab="Miles Per Gallon")
## Row-wise
par(mfrow=c(2,1))
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)
plot(state.x77[,1], ylab="Population")
## Column-wise
par(mfcol=c(1,2))
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)
plot(state.x77[,1], ylab="Population")
plot(state.x77[,2], ylab="Income")
par(mfcol=c(1,1))
matplotJohnsonJohnson
## Qtr1 Qtr2 Qtr3 Qtr4
## 1960 0.71 0.63 0.85 0.44
## 1961 0.61 0.69 0.92 0.55
## 1962 0.72 0.77 0.92 0.60
## 1963 0.83 0.80 1.00 0.77
## 1964 0.92 1.00 1.24 1.00
## 1965 1.16 1.30 1.45 1.25
## 1966 1.26 1.38 1.86 1.56
## 1967 1.53 1.59 1.83 1.86
## 1968 1.53 2.07 2.34 2.25
## 1969 2.16 2.43 2.70 2.25
## 1970 2.79 3.42 3.69 3.60
## 1971 3.60 4.32 4.32 4.05
## 1972 4.86 5.04 5.04 4.41
## 1973 5.58 5.85 6.57 5.31
## 1974 6.03 6.39 6.93 5.85
## 1975 6.93 7.74 7.83 6.12
## 1976 7.74 8.91 8.28 6.84
## 1977 9.54 10.26 9.54 8.73
## 1978 11.88 12.06 12.15 8.91
## 1979 14.04 12.96 14.85 9.99
## 1980 16.20 14.67 16.02 11.61
class(JohnsonJohnson)
## [1] "ts"
m <- matrix(JohnsonJohnson, ncol=4, byrow = TRUE)
m
## [,1] [,2] [,3] [,4]
## [1,] 0.71 0.63 0.85 0.44
## [2,] 0.61 0.69 0.92 0.55
## [3,] 0.72 0.77 0.92 0.60
## [4,] 0.83 0.80 1.00 0.77
## [5,] 0.92 1.00 1.24 1.00
## [6,] 1.16 1.30 1.45 1.25
## [7,] 1.26 1.38 1.86 1.56
## [8,] 1.53 1.59 1.83 1.86
## [9,] 1.53 2.07 2.34 2.25
## [10,] 2.16 2.43 2.70 2.25
## [11,] 2.79 3.42 3.69 3.60
## [12,] 3.60 4.32 4.32 4.05
## [13,] 4.86 5.04 5.04 4.41
## [14,] 5.58 5.85 6.57 5.31
## [15,] 6.03 6.39 6.93 5.85
## [16,] 6.93 7.74 7.83 6.12
## [17,] 7.74 8.91 8.28 6.84
## [18,] 9.54 10.26 9.54 8.73
## [19,] 11.88 12.06 12.15 8.91
## [20,] 14.04 12.96 14.85 9.99
## [21,] 16.20 14.67 16.02 11.61
matplot(m, type="l")
Q-Q Plots# Q-Q plots
x1 <- rnorm(100)
qqnorm(x1)
qqline(x1)
# Comparing 2 distributions
par(mfrow=c(1,2))
x <- rt(100, df=3)
# normal fit
qqnorm(x); qqline(x)
# t(3Df) fit
qqplot(rt(1000,df=3), x, main="t(3) Q-Q Plot",
ylab="Sample Quantiles")
abline(0,1)
Interpreting QQ Plots: http://stats.stackexchange.com/a/101290/21450
Many base plotting functions share a set of parameters. Here are a few key ones:
References:
ggplot2library(ggplot2)
ggplot(data=mtcars, aes(x=wt, y=mpg)) +
geom_point() +
labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")
Geometric objects
ggplot(data=mtcars, aes(x=wt, y=mpg, color=cyl)) +
geom_point(size=5) +
labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")
ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) +
geom_point() +
labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")
ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) +
geom_point(color="red") +
labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")
Use diamonds dataset and plot this image
library(lattice)
ggplot(singer, aes(x=height)) +
geom_histogram()
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(singer, aes(x=height)) +
geom_histogram(binwidth = 2)
ggplot(singer, aes(x=voice.part, y=height)) +
geom_boxplot()
data(Salaries, package="car")
ggplot(Salaries, aes(x=rank, fill=sex)) +
geom_bar(position="stack") +
labs(title='position="stack"')
ggplot(Salaries, aes(x=rank, fill=sex)) +
geom_bar(position="dodge") +
labs(title='position="dodge"')
ggplot(Salaries, aes(x=rank, fill=sex)) +
geom_bar(position="fill") +
labs(title='position="fill"')
ggplot(Salaries, aes(x=rank, fill=sex)) +
geom_bar(position="fill") +
labs(title='position="fill"') +
coord_flip()
JohnsonJohnson
## Qtr1 Qtr2 Qtr3 Qtr4
## 1960 0.71 0.63 0.85 0.44
## 1961 0.61 0.69 0.92 0.55
## 1962 0.72 0.77 0.92 0.60
## 1963 0.83 0.80 1.00 0.77
## 1964 0.92 1.00 1.24 1.00
## 1965 1.16 1.30 1.45 1.25
## 1966 1.26 1.38 1.86 1.56
## 1967 1.53 1.59 1.83 1.86
## 1968 1.53 2.07 2.34 2.25
## 1969 2.16 2.43 2.70 2.25
## 1970 2.79 3.42 3.69 3.60
## 1971 3.60 4.32 4.32 4.05
## 1972 4.86 5.04 5.04 4.41
## 1973 5.58 5.85 6.57 5.31
## 1974 6.03 6.39 6.93 5.85
## 1975 6.93 7.74 7.83 6.12
## 1976 7.74 8.91 8.28 6.84
## 1977 9.54 10.26 9.54 8.73
## 1978 11.88 12.06 12.15 8.91
## 1979 14.04 12.96 14.85 9.99
## 1980 16.20 14.67 16.02 11.61
jj <- matrix(JohnsonJohnson, ncol = 4, byrow = TRUE)
jj <- cbind(matrix(1960:1980),jj)
colnames(jj) <- c("Year","Q1", "Q2", "Q3", "Q4")
jj <- data.frame(jj)
ggplot(jj,aes(x=Year,y=Q1)) +
geom_line()
### illustrating example of reshaping data for ggplot plotting
library(reshape2)
melt_jj <- melt(jj, id.vars = "Year")
ggplot(melt_jj, aes(x=Year, y = value, color=variable)) +
geom_line()
data(Salaries, package="car")
library(ggplot2)
ggplot(Salaries, aes(x=salary)) +
geom_density(alpha=0.3)
ggplot(Salaries, aes(x=salary, fill=rank)) +
geom_density(alpha=0.3)
ggplot(Salaries, aes(x=yrs.since.phd, y=salary, shape=sex, color=rank )) +
geom_point(size=5)
facet_wrapdata(singer, package="lattice")
library(ggplot2)
ggplot(data=singer, aes(x=height)) +
geom_histogram() +
facet_wrap(~voice.part, nrow=4)
facet_gridm <- mtcars
m$am <- factor(m$am, levels=c(0,1), labels=c("Automatic", "Manual"))
m$vs <- factor(m$vs, levels=c(0,1), labels=c("V-Engine", "Straight Engine"))
m$cyl <- factor(m$cyl)
ggplot(data=m, aes(x=hp, y=mpg, shape=cyl, color=cyl)) +
geom_point(size=3) +
facet_grid(am~vs) +
labs(title="Automobile Data by Engine Type",
x="Horse Power", y="Miles Per Gallon")
Install the gridExtra package
library(gridExtra)
p1 <- ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) +
geom_point(pch=17, color="blue", size=2) +
geom_smooth(method="lm", color="red", linetype=2) +
labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")
p2 <- ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) +
geom_point(pch=17, color="blue", size=2) +
geom_smooth(method="loess", color="red", linetype=2) +
labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")
grid.arrange(p1,p2,ncol=2)
grid.arrange(p1,p2,nrow=2)
ggplot(mtcars,aes(x=wt, y=hp)) +
geom_point()
ggsave("myplot.pdf")
## Saving 7 x 5 in image
ggsave("myplot.png")
## Saving 7 x 5 in image